Data from Cytation experiment performed 2020-12-14

Combined luminescence and imaging reads of several SCLC and melanoma cell lines treated with various drugs and concentrations.

NOTE: Some contamination (bacterial? fungal?) was visible in some cell culture vessels prior to experiment (per Clayton Wandishin).

library(diprate)
Registered S3 method overwritten by 'data.table':
  method           from
  print.data.table     
closestTime <- function (mytime, timevec, direction = "", out = "pos")
{
    sapply(mytime, function(mt) 
    {
        difft <- switch(direction,
                        before = -difftime(timevec, mt),
                        after = -difftime(mt, timevec),
                        abs(difftime(mt, timevec))
                        )
        if(direction %in% c("before","after"))
        {
            mindiff = ifelse(length(difft[difft>=0]) == 0, NA, min(difft[difft>=0]))
            if(is.na(mindiff))
            {
                r <- NA
            } else {
                r <- as.vector(switch(out,
                                      time = timevec[which(difft == min(difft[difft>=0]) )],
                                      pos = which(difft == min(difft[difft>=0])),
                                      amt = min(difft[difft>=0])
                                      )
                               )
            }
        } else {
            r <- switch(out,
                        time = as.character(timevec[which(difft == min(difft))]),
                        pos = which(difft == min(difft)),
                        amt = min(difft)
                        )
        }
        return(r)
    })
}

getDateTime <- function(dname)
{
    o <- dname[grepl("[0-9]{6}_*[0-9]{6}",dname)]
    if(all(grepl("[0-9]{12}",o)))
    {
        mytime <- strptime(o, '%y%m%d%H%M%S')
    } else {
        mytime <- paste(sapply(strsplit(o,'_'), '[[', 1),
            sapply(strsplit(o,'_'), '[[', 2), sep='_')
        mytime <- strptime(mytime, '%y%m%d_%H%M%S')
    }
    names(mytime) <- NULL
    return(mytime)
}

Data assembly

Data were not completely compiled into a single dataframe for analysis. Must assemble prior to analysis.

TOPDIR <- "../data/20201216_Lum_Image_Run/"
# Momentum log path
ml_path <- file.path(TOPDIR,'MOMENTUM_LOGS_FOR_V004831B_20201214_100msRFP_10msGFP_20201216_153632_0.txt')
# Momentum log
ml <- read.csv(ml_path, header=FALSE, skip=5, as.is=TRUE, sep='\t')
# Index of rows with reads
read_i <- which(ml$V6=='Read')
# Keep only relevant columns
ml <- ml[read_i,c('V1','V6','V7','V12','V13','V14')]
# Rename columns
colnames(ml) <- c('instrument','action','id','start.time','end.time','barcode')
rownames(ml) <- NULL


lum <- read.csv(file.path(TOPDIR,'CombinedLuminescenceTOTAL.csv'), as.is=TRUE, row.names=1)
# Cell counts = cc
cc <- read.csv(file.path(TOPDIR,'Cellcounts_20201214.csv'), as.is=TRUE)
cc <- cc[order(cc$plate_id,cc$well),]

# standardize colnames
colnames(cc) <- gsub("_",".",colnames(cc))

Data munging

Luminescence data already has most annotation associated with it, including plate barcode (ID), cell line, drug, drug conc, time and well. The times were parsed from the filenames of the raw luminescence data (see [../data/20201216_Lum_Image_Run/MultiPlateMultiFileMergeLum20201216_special.py])

Momentum log should have times matching (or very similar to) these values. Must make sure they’re in the same format. Use the closestTime function to obtain the position in the

The cell counts were obtained from two different positions within each well. These values should be summed and same for the counts of nuclei that are positive in the second channel (the dead-cell stain; ch2_pos).

# Cell count image file path =  ccifp; from task arguments sent to py-seg image processing pipeline
ccifp <- read.csv(file.path(TOPDIR,'TaskArgs_20201214.csv'))$nuc_im_path
# Find times for each unique plate
ccifp <- gsub("/mnt/monica/quaranta2/Cytation/2020-12-14/images/","",ccifp)
ccifp <- unique(sapply(strsplit(ccifp, "/"), "[[", 1))

# get plate IDs from number after "Experiment"; should match Momentum log
pid <- as.integer(sapply(strsplit(ccifp,"Experiment"), "[[", 2))
names(pid) <- getDateTime(ccifp)

cc$image.time <- names(pid)[match(cc$plate.id,pid)]

# standardize lum colnames
colnames(lum) <- gsub("_",".",tolower(colnames(lum)))
if(!any(grepl("drug1",colnames(lum)))) colnames(lum) <- gsub("drug","drug1",colnames(lum))
colnames(lum)[colnames(lum)=="tothour.lum"] <- "time"
colnames(lum)[colnames(lum)=="datetime.lum"] <- "lum.time"

# ulfst = unique lum file save times
ulfst <- unique(lum$lum.time)

# plate name (barcode from Momentum log)
# pn <- ml[closestTime(unique(cc$image.time),ml[grepl('RFP',ml$id),'start.time.rfmt']),'barcode']
# plate name (from lum)
pn <- lum[closestTime(unique(cc$image.time),unique(lum$lum.time),direction="before"),'plate.name']
names(pn) <- unique(cc$image.time)

# add plate.name (barcode) to cc
cc$plate.name <- pn[match(cc$image.time,names(pn))]

# must sum cell counts and ch2_pos per uid2 (unique for each time point) 
cc$uid <- paste(cc$plate.id,cc$well,sep="_")
cc$uid2 <- paste(cc$uid,cc$plate.id,sep="_")
cc_temp <- cc[!duplicated(cc$uid2),]
cc_temp$cell.count <- sapply(unique(cc$uid2), function(x) sum(cc[cc$uid2==x,'cell.count']))
cc_temp$ch2.pos <- sapply(unique(cc$uid2), function(x) sum(cc[cc$uid2==x,'ch2.pos']))

# time lookup table: cc image times as names, lum file save times as values
# time_lut <- closestTime(unique(cc_temp$image.time),ulfst,direction="before",out="time")

# Add luminescence read times; note that the first luminescence read is missing
# cc_temp$lum.time <- time_lut[cc_temp$image.time]
keep_cols <- c("plate.name","image.time","lum.time","well","cell.count","ch2.pos")
# remove unneeded columns from cc_temp
cc_temp <- cc_temp[,colnames(cc_temp) %in% keep_cols]
# first plate did not get luminescence read; must pull info from Momentum logs or remove
cc_temp <- cc_temp[!is.na(cc_temp$plate.name),]
# cc_temp$uid <- paste(cc_temp$plate.name,cc_temp$well,cc_temp$lum.time, sep="_")

lum$uid <- paste(lum$lum.time, lum$well, sep="_")
lum$pid <- paste(lum$plate.name, lum$lum.time, sep="_")

# keep only unique times for each luminescence read
lum_times <- lum[!duplicated(lum$pid),]
lum_times$plate.name <- factor(lum_times$plate.name, 
                               levels=unique(lum_times$plate.name)[order(lum_times[!duplicated(lum_times$plate.name),"time"])])
lum_times <- lum_times[order(lum_times$plate.name,lum_times$time),]

NOTE: plate names and times do not match between imaging and lum

Try to use momentum log to reconcile. This code likely not necessary since already have matched times in cc_temp)

ml$start.time.rfmt <- strptime(ml$start.time, format="%m/%d/%Y %r")
ml$end.time.rfmt <- strptime(ml$end.time, format="%m/%d/%Y %r")
ml$image.time <- unique(cc$image.time)[closestTime(ml$start.time.rfmt,unique(cc$image.time))]

# copy image.time from successive row
ml[grep("Lum",ml$id),"image.time"] <- ml[grep("Lum",ml$id)+1,"image.time"]

ml$lum.time <- unique(lum_times$lum.time)[closestTime(ml$end.time.rfmt,unique(lum_times$lum.time))]
# copy lum.time from preceding row
ml[grep("RFP",ml$id),"lum.time"] <- ml[grep("RFP",ml$id)-1,"lum.time"]
cc_temp$lum.time <- ml[match(cc_temp$image.time,ml$image.time),"lum.time"]
cc_temp$uid <- paste(cc_temp$lum.time, cc_temp$well, sep="_")
cc_temp <- cc_temp[match(lum$uid,cc_temp$uid),]
rownames(cc_temp) <- NULL

lum2 <- cbind(lum, cc_temp[,c('cell.count','ch2.pos','image.time')])

NOTE: first luminescence reading is missing

NOTE: in this dataset, luminescence values below ~2000 indistinguishable from background

Separate data by barcode (cell line)

Plate barcodes were saved as plate.name. Each barcode is associated with a different cell line.

by_plate <- lapply(unique(lum2$plate.name), function(pn) lum2[lum2$plate.name==pn,])
names(by_plate) <- unique(lum2$plate.name)
par(mfrow=c(4,3))
for (barcode in names(by_plate)) {
    cont <- by_plate[[barcode]][by_plate[[barcode]]$drug1.conc==0,]
    cont <- cont[order(cont$well,cont$time),]
    if(any(grepl("DMS53",cont$cell.line))) cont <- cont[cont$time <= 48,]
    cont <- cont[cont$time <= 96,]
    cl <- unique(cont$cell.line)
    #do.call(plotGC, getGCargs(cont[cont$well %in% c('B11','C11','D11'),], 
    #                          dat.col = c("time", "cell.count", "well")))
    invisible(do.call(plotGC, append(getGCargs(cont,
                              dat.col = c("time", "cell.count", "well")),list(main=paste(cl,"cell count doublings")))))
    invisible(do.call(plotGC, append(getGCargs(cont,
                              dat.col = c("time", "rlu", "well")),list(main=paste(cl,"lum doublings")))))
    invisible(do.call(plotGC, append(getGCargs(cont,
                              dat.col = c("time", "rlu", "well")),list(main=paste(cl,"lum"),count.type = "log"))))    
}

Clayton’s heuristic

Clayton Wandishin states that the relationship between luminscence and cell count can be explained by the following mathematical model:

1 = d/dx(log2(J*Lum))

par(mfrow=c(1,3))
invisible(lapply(names(j), function(n) {
    dat <- my_lum[my_lum$cell.line==n,]
    plot(dat$time,log2(dat$rlu)*dat$time/j[n])
}
))

---
title: "RT-Glo Cytation 2020-10-08 analysis"
author: "Darren Tyson"
date: "03/10/2021"
output: html_notebook
---

#### Data from Cytation experiment performed 2020-12-14
Combined luminescence and imaging reads of several SCLC and melanoma cell lines treated with various drugs and concentrations.  

NOTE: Some contamination (bacterial? fungal?) was visible in some cell culture vessels prior to experiment (per Clayton Wandishin).  


```{r Setup}
library(diprate)

closestTime <- function (mytime, timevec, direction = "", out = "pos")
{
    sapply(mytime, function(mt) 
    {
        difft <- switch(direction,
                        before = -difftime(timevec, mt),
                        after = -difftime(mt, timevec),
                        abs(difftime(mt, timevec))
                        )
        if(direction %in% c("before","after"))
        {
            mindiff = ifelse(length(difft[difft>=0]) == 0, NA, min(difft[difft>=0]))
            if(is.na(mindiff))
            {
                r <- NA
            } else {
                r <- as.vector(switch(out,
                                      time = timevec[which(difft == min(difft[difft>=0]) )],
                                      pos = which(difft == min(difft[difft>=0])),
                                      amt = min(difft[difft>=0])
                                      )
                               )
            }
        } else {
            r <- switch(out,
                        time = as.character(timevec[which(difft == min(difft))]),
                        pos = which(difft == min(difft)),
                        amt = min(difft)
                        )
        }
        return(r)
    })
}

getDateTime <- function(dname)
{
    o <- dname[grepl("[0-9]{6}_*[0-9]{6}",dname)]
    if(all(grepl("[0-9]{12}",o)))
    {
        mytime <- strptime(o, '%y%m%d%H%M%S')
    } else {
        mytime <- paste(sapply(strsplit(o,'_'), '[[', 1),
            sapply(strsplit(o,'_'), '[[', 2), sep='_')
        mytime <- strptime(mytime, '%y%m%d_%H%M%S')
    }
    names(mytime) <- NULL
    return(mytime)
}
```

#### Data assembly
Data were not completely compiled into a single dataframe for analysis. Must assemble prior to analysis.

```{r}
TOPDIR <- "../data/20201216_Lum_Image_Run/"
# Momentum log path
ml_path <- file.path(TOPDIR,'MOMENTUM_LOGS_FOR_V004831B_20201214_100msRFP_10msGFP_20201216_153632_0.txt')
# Momentum log
ml <- read.csv(ml_path, header=FALSE, skip=5, as.is=TRUE, sep='\t')
# Index of rows with reads
read_i <- which(ml$V6=='Read')
# Keep only relevant columns
ml <- ml[read_i,c('V1','V6','V7','V12','V13','V14')]
# Rename columns
colnames(ml) <- c('instrument','action','id','start.time','end.time','barcode')
rownames(ml) <- NULL


lum <- read.csv(file.path(TOPDIR,'CombinedLuminescenceTOTAL.csv'), as.is=TRUE, row.names=1)
# Cell counts = cc
cc <- read.csv(file.path(TOPDIR,'Cellcounts_20201214.csv'), as.is=TRUE)
cc <- cc[order(cc$plate_id,cc$well),]

# standardize colnames
colnames(cc) <- gsub("_",".",colnames(cc))
```

#### Data munging
Luminescence data already has most annotation associated with it, including plate barcode (ID), cell line, drug, drug conc, time and well. The times were parsed from the filenames of the raw luminescence data (see [../data/20201216_Lum_Image_Run/MultiPlateMultiFileMergeLum20201216_special.py])  

Momentum log should have times matching (or very similar to) these values. Must make sure they're in the same format. Use the `closestTime` function to obtain the position in the 

The cell counts were obtained from two different positions within each well. These values should be summed and same for the counts of nuclei that are positive in the second channel (the dead-cell stain; `ch2_pos`).

```{r Time matching}
# Cell count image file path =  ccifp; from task arguments sent to py-seg image processing pipeline
ccifp <- read.csv(file.path(TOPDIR,'TaskArgs_20201214.csv'))$nuc_im_path
# Find times for each unique plate
ccifp <- gsub("/mnt/monica/quaranta2/Cytation/2020-12-14/images/","",ccifp)
ccifp <- unique(sapply(strsplit(ccifp, "/"), "[[", 1))

# get plate IDs from number after "Experiment"; should match Momentum log
pid <- as.integer(sapply(strsplit(ccifp,"Experiment"), "[[", 2))
names(pid) <- getDateTime(ccifp)

cc$image.time <- names(pid)[match(cc$plate.id,pid)]

# standardize lum colnames
colnames(lum) <- gsub("_",".",tolower(colnames(lum)))
if(!any(grepl("drug1",colnames(lum)))) colnames(lum) <- gsub("drug","drug1",colnames(lum))
colnames(lum)[colnames(lum)=="tothour.lum"] <- "time"
colnames(lum)[colnames(lum)=="datetime.lum"] <- "lum.time"

# ulfst = unique lum file save times
ulfst <- unique(lum$lum.time)

# plate name (barcode from Momentum log)
# pn <- ml[closestTime(unique(cc$image.time),ml[grepl('RFP',ml$id),'start.time.rfmt']),'barcode']
# plate name (from lum)
pn <- lum[closestTime(unique(cc$image.time),unique(lum$lum.time),direction="before"),'plate.name']
names(pn) <- unique(cc$image.time)

# add plate.name (barcode) to cc
cc$plate.name <- pn[match(cc$image.time,names(pn))]

# must sum cell counts and ch2_pos per uid2 (unique for each time point) 
cc$uid <- paste(cc$plate.id,cc$well,sep="_")
cc$uid2 <- paste(cc$uid,cc$plate.id,sep="_")
cc_temp <- cc[!duplicated(cc$uid2),]
cc_temp$cell.count <- sapply(unique(cc$uid2), function(x) sum(cc[cc$uid2==x,'cell.count']))
cc_temp$ch2.pos <- sapply(unique(cc$uid2), function(x) sum(cc[cc$uid2==x,'ch2.pos']))

# time lookup table: cc image times as names, lum file save times as values
# time_lut <- closestTime(unique(cc_temp$image.time),ulfst,direction="before",out="time")

# Add luminescence read times; note that the first luminescence read is missing
# cc_temp$lum.time <- time_lut[cc_temp$image.time]
keep_cols <- c("plate.name","image.time","lum.time","well","cell.count","ch2.pos")
# remove unneeded columns from cc_temp
cc_temp <- cc_temp[,colnames(cc_temp) %in% keep_cols]
# first plate did not get luminescence read; must pull info from Momentum logs or remove
cc_temp <- cc_temp[!is.na(cc_temp$plate.name),]
# cc_temp$uid <- paste(cc_temp$plate.name,cc_temp$well,cc_temp$lum.time, sep="_")

lum$uid <- paste(lum$lum.time, lum$well, sep="_")
lum$pid <- paste(lum$plate.name, lum$lum.time, sep="_")

# keep only unique times for each luminescence read
lum_times <- lum[!duplicated(lum$pid),]
lum_times$plate.name <- factor(lum_times$plate.name, 
                               levels=unique(lum_times$plate.name)[order(lum_times[!duplicated(lum_times$plate.name),"time"])])
lum_times <- lum_times[order(lum_times$plate.name,lum_times$time),]
```

## NOTE: plate names and times do not match between imaging and lum
Try to use momentum log to reconcile. This code likely not necessary since already have matched times in `cc_temp`)
```{r Time matching part 2}
ml$start.time.rfmt <- strptime(ml$start.time, format="%m/%d/%Y %r")
ml$end.time.rfmt <- strptime(ml$end.time, format="%m/%d/%Y %r")
ml$image.time <- unique(cc$image.time)[closestTime(ml$start.time.rfmt,unique(cc$image.time))]

# copy image.time from successive row
ml[grep("Lum",ml$id),"image.time"] <- ml[grep("Lum",ml$id)+1,"image.time"]

ml$lum.time <- unique(lum_times$lum.time)[closestTime(ml$end.time.rfmt,unique(lum_times$lum.time))]
# copy lum.time from preceding row
ml[grep("RFP",ml$id),"lum.time"] <- ml[grep("RFP",ml$id)-1,"lum.time"]
```



```{r Add cell counts to lum data}
cc_temp$lum.time <- ml[match(cc_temp$image.time,ml$image.time),"lum.time"]
cc_temp$uid <- paste(cc_temp$lum.time, cc_temp$well, sep="_")
cc_temp <- cc_temp[match(lum$uid,cc_temp$uid),]
rownames(cc_temp) <- NULL

lum2 <- cbind(lum, cc_temp[,c('cell.count','ch2.pos','image.time')])
```

## NOTE: first luminescence reading is missing

#### NOTE: in this dataset, luminescence values below ~2000 indistinguishable from background


#### Separate data by barcode (cell line)
Plate barcodes were saved as `plate.name`. Each barcode is associated with a different cell line.
```{r}
by_plate <- lapply(unique(lum2$plate.name), function(pn) lum2[lum2$plate.name==pn,])
names(by_plate) <- unique(lum2$plate.name)
```


```{r Plot all control wells, fig.height=11, fig.width=8.5}
par(mfrow=c(4,3))
for (barcode in names(by_plate)) {
    cont <- by_plate[[barcode]][by_plate[[barcode]]$drug1.conc==0,]
    cont <- cont[order(cont$well,cont$time),]
    if(any(grepl("DMS53",cont$cell.line))) cont <- cont[cont$time <= 48,]
    cont <- cont[cont$time <= 96,]
    cl <- unique(cont$cell.line)
    #do.call(plotGC, getGCargs(cont[cont$well %in% c('B11','C11','D11'),], 
    #                          dat.col = c("time", "cell.count", "well")))
    invisible(do.call(plotGC, append(getGCargs(cont,
                              dat.col = c("time", "cell.count", "well")),list(main=paste(cl,"cell count doublings")))))
    invisible(do.call(plotGC, append(getGCargs(cont,
                              dat.col = c("time", "rlu", "well")),list(main=paste(cl,"lum doublings")))))
    invisible(do.call(plotGC, append(getGCargs(cont,
                              dat.col = c("time", "rlu", "well")),list(main=paste(cl,"lum"),count.type = "log"))))    
}

```

### Clayton's heuristic
Clayton Wandishin states that the relationship between luminscence and cell count can be explained by the following mathematical model:

`1 = d/dx(log2(J*Lum))`  


```{r}
my_lum <- lum2[lum2$time <= 96,c('cell.line','time','rlu')]
dms53 <- my_lum[my_lum$cell.line=="DMS53" & my_lum$time <= 48,]
my_lum <- rbind(my_lum[my_lum$cell.line!="DMS53",],dms53)

j <- c(H1048=18,H841=14,DMS53=12)
```

```{r fig.height=3, fig.width=8.5}
par(mfrow=c(1,3))
invisible(lapply(names(j), function(n) {
    dat <- my_lum[my_lum$cell.line==n,]
    plot(dat$time,log2(dat$rlu)*dat$time/j[n])
}
))
```

